* Firm organization with multiple establishments
* Section V: Figure VIIIa, Appendix Table D.5
* Directly affected establishments

clear all 
set matsize 2000
set more off

capture log close
log using log/21a_train_layer-firm_est-direct_pretrends.log, replace

use data/train_analysis_firm.dta, clear

********************************************************************************
***	Data preparation

keep if hauptbet == 0 & d_traindata == 1 & entryyear < 2006

***	Restrict sample to counties that ever have a treated establishment
foreach route in early mid late any {
	bys ao_kreis: egen ever_treat_cty_`route' = max(ever_faster_`route')
	}

count
keep if ever_treat_cty_any == 1
count

***	Define interaction terms
gen d_year2000 = (jahr == 2000)
foreach route in early mid late any {
	forvalues t=2000/2010{
		gen int_`route'_y`t' = ever_faster_`route' * d_year`t'
		}
	}
drop int_any_y2000 int_early_y2002 int_mid_y2004 int_late_y2006

foreach route in early mid late any {
	forvalues t=2000/2010{
		gen int_firm_`route'_y`t' = firm_ever_faster_`route' * d_year`t'
		}
	}
drop int_firm_any_y2000 int_firm_early_y2002 int_firm_mid_y2004 int_firm_late_y2006

cap drop flg_est* flg_unt*
egen flg_estjhr = tag(betnr jahr)
egen flg_untjhr = tag(untid jahr)
egen flg_est = tag(betnr)
count

save data/train_analysis_firm_pretrends.dta, replace

********************************************************************************
***	Table D.5, top left
***	Pretrends, full sample, directly affected establishments

use data/train_analysis_firm_pretrends.dta, clear

count
bys betnr: egen max_year = max(jahr)

***	Build data set

//	15 counties are affected by routes 1 and 3: 
//	assign 2000-2005 to FFM-CLG, 0 = 2002; assign 2003-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1
keep if jahr < 2006
keep if entryyear < 2002 & max_year > 2002
gen year_cent = .
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab year_cent, m
tab jahr, m
save data/train_controls_earlylate-early-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1
keep if jahr > 2002
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_earlylate-late-entry.dta, replace
restore

//	9 counties are affected by routes 2 and 3: 
//	assign 2000-2005 to HH-BER, 0 = 2004; assign 2005-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr < 2006
keep if entryyear < 2004 & max_year > 2004
gen year_cent = .
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab year_cent, m
save data/train_controls_midlate-mid-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2004
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_midlate-late-entry.dta, replace
restore

//	7 counties are affected by all routes: 
//	assign 2000-2003 to FFM-CLG, 0 = 2002; assign 2003-2005 to HH-BER, 0 = 2004; assign 2005-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr < 2004
keep if entryyear < 2002 & max_year > 2002
gen year_cent = .
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab year_cent, m
save data/train_controls_earlymidlate-early-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2002 & jahr < 2006
keep if entryyear < 2004 & max_year > 2004
gen year_cent = .
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab year_cent, m
save data/train_controls_earlymidlate-mid-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2004
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_earlymidlate-late-entry.dta, replace
restore

//	65 counties are affected by only one route
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 0 & entryyear < 2002 & max_year > 2002 | ///
	ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 0 & entryyear < 2004 & max_year > 2004 | ///
	ever_treat_cty_early == 0 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1 & max_year > 2006

gen year_cent = .
tab jahr if ever_treat_cty_early == 1
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab jahr if ever_treat_cty_mid == 1
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab jahr if ever_treat_cty_late == 1
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 

tab year_cent, m

append using data/train_controls_earlylate-early-entry.dta
append using data/train_controls_earlylate-late-entry.dta
append using data/train_controls_midlate-mid-entry.dta
append using data/train_controls_midlate-late-entry.dta
append using data/train_controls_earlymidlate-early-entry.dta
append using data/train_controls_earlymidlate-mid-entry.dta
append using data/train_controls_earlymidlate-late-entry.dta

tab year_cent, m

// annual dummies
cap drop d_year_cent_*
foreach t of numlist 1/8 {
	gen d_year_cent_`t' = (year_cent == `t')
}
gen d_year_cent_neg6 = (year_cent == -6)
gen d_year_cent_neg5 = (year_cent == -5)
gen d_year_cent_neg4 = (year_cent == -4)
gen d_year_cent_neg3 = (year_cent == -3)
gen d_year_cent_neg2 = (year_cent == -2)
gen d_year_cent_neg1 = (year_cent == -1)

cap drop int_cent_y*
foreach t in neg6 neg5 neg4 neg3 neg2 neg1 1 2 3 4 5 6 7 8 {
	gen int_cent_y`t' = ever_faster_any * d_year_cent_`t'
}

cap drop int_1_cent_y*
foreach t in neg6 neg5 neg4 neg3 neg2 neg1 1 2 3 4 5 6 7 8 {
	gen int_1_cent_y`t' = ever_faster1 * d_year_cent_`t'
}

// bi-annual dummies
gen d_bi_year_cent_neg56 = (year_cent == -6 | year_cent == -5)
gen d_bi_year_cent_neg34 = (year_cent == -4 | year_cent == -3)
gen d_bi_year_cent_neg12 = (year_cent == -2 | year_cent == -1)

gen d_bi_year_cent_pos12 = (year_cent == 1 | year_cent == 2)
gen d_bi_year_cent_pos34 = (year_cent == 3 | year_cent == 4)
gen d_bi_year_cent_pos56 = (year_cent == 5 | year_cent == 6)

cap drop int_bi_cent_y*
foreach t in neg56 neg34 neg12 pos12 pos34 pos56 {
	gen int_bi_cent_y`t' = ever_faster_any * d_bi_year_cent_`t'
}

cap drop int_1_bi_cent_y*
foreach t in neg56 neg34 neg12 pos12 pos34 pos56 {
	gen int_1_bi_cent_y`t' = ever_faster1 * d_bi_year_cent_`t'
}

***	Run regression

preserve

bys betnr: egen first_year = min(jahr)
keep if first_year < 2002 & year_cent >= -4 & year_cent <= 6

count
count if flg_est == 1

gen aux_empl = empl_bet if jahr >= 2000 & jahr < 2002
bys betnr: egen empl_2000 = mean(aux_empl)
drop aux_empl

forvalues q = 25(25)75 {
	qui egen empl_perc_`q' = pctile(empl_2000), p(`q')
}
qui gen perc_empl = 100 if empl_2000 < .
forval q = 75(-25)25 {
	qui replace perc_empl = `q' if empl_2000 <= empl_perc_`q' & empl_perc_`q' != .
}
drop empl_perc*

egen group_strata = group(ao_kreis perc_empl jahr)
bysort group_strata: egen tmp1 = sum(ever_faster1 == 0 & ln_prdt_bet != .)
gen tmp2 = (tmp1 >0 )
bysort group_strata: egen tmp3 = sum(ever_faster1 == 1 & ln_prdt_bet != .)
gen tmp4 = (tmp3 >0 )

cap drop aux_touse
gen aux_touse = (tmp2 > 0 & tmp4 > 0)

cap drop sample
qui reghdfe ln_prdt_bet ever_faster1 int_1_cent_y* if aux_touse == 1, absorb(betnr county_year) vce(robust)
qui gen sample = e(sample)

cap drop touse*
cap drop weight*

bys group_strata: egen tmp7 = sum(ever_faster1 == 0 & sample == 1)
gen tmp8 = (tmp7 > 0)
by  group_strata: egen tmp9 = sum(ever_faster1 == 1 & sample == 1)
gen tmp10 = (tmp9 > 0)

gen touse = (tmp8 > 0 & tmp10 > 0)
reghdfe ln_prdt_bet ever_faster1 int_1_cent_y* if touse == 1, absorb(betnr county_year) vce(robust)

cap drop n_treat_s n_ec_s
bysort group_strata: egen n_treat_s = sum(ever_faster1 == 1 & sample == 1)
bysort group_strata: egen n_ec_s = sum(ever_faster1 == 0 & sample == 1)

gen aux_weight = (n_treat_s/n_ec_s) if ever_faster1 == 0 & aux_touse == 1 & sample == 1
replace aux_weight = 1 if ever_faster1 == 1 & aux_touse == 1 & sample == 1

sum ever_faster1 if ever_faster1 == 1 & touse == 1 & sample == 1
local n_treat = `r(N)'

sum ever_faster1 if ever_faster1 == 0 & touse == 1 & sample == 1
local n_ec = `r(N)' 

gen weight = 1 if ever_faster1 == 1 & touse == 1 & sample == 1
replace weight = `n_ec'/`n_treat'*aux_weight if ever_faster1 == 0 & touse == 1 & sample == 1

tabstat weight if touse == 1 & sample == 1, c(s) s(N sum)

qui reghdfe ln_empl_bet ever_faster1 int_1_cent_y* if touse == 1 [aweight=weight], absorb(betnr county_year) vce(robust)
tabstat weight if e(sample), c(s) s(N sum)

drop aux_weight 
drop n_treat_s n_ec_s
drop tmp*

local j = 1
foreach outcome in ln_prdt_bet avg_lnw_bot_bet count_mgmt_bet shr_bloss_w_bet {

	qui sum `outcome', det
	qui replace `outcome' = r(p99) if `outcome' > r(p99) & `outcome' != .
	qui sum `outcome', det
	qui replace `outcome' = r(p1) if `outcome' < r(p1) & `outcome' != .
	
	eststo vcebet`j', title(`outcome'): ///
		qui reghdfe `outcome' ever_faster1 int_1_bi_cent_y* if touse == 1 [aweight=weight], absorb(betnr county_year) vce(cluster ao_kreis)
		qui estadd scalar count_bet = e(K1): vcebet`j'
		qui estadd scalar count_kreis = e(K2): vcebet`j'

	local j = `j' + 1
}

********************************************************************************
*** Table D.6, top left
esttab vcebet1 vcebet2 vcebet3 vcebet4, ///
	b(%9.3f) se(%9.3f) star r2 obslast compress mtitles star(+ 0.20 ++ 0.10 * 0.05 ** 0.01 *** 0.001) stats(r2 N count_bet count_kreis, fmt(%9.3f %9.0f %9.0f %9.0f))  style(tex) label
esttab vcebet1 vcebet2 vcebet3 vcebet4, ///
	b(%9.3f) p(%9.3f) star r2 obslast compress mtitles star(+ 0.20 ++ 0.10 * 0.05 ** 0.01 *** 0.001) stats(r2 N count_bet count_kreis, fmt(%9.3f %9.0f %9.0f %9.0f))  style(tex) label

tab year_cent if e(sample), m

restore

********************************************************************************
***	Figure VIIIa, Table D.5, top right
***	Pretrends, firms with at least two establishments, directly affected establishments

use data/train_analysis_firm_pretrends.dta, clear

keep if count_est > 2
count
bys betnr: egen max_year = max(jahr)

***	Build data set

//	15 counties are affected by routes 1 and 3: 
//	assign 2000-2005 to FFM-CLG, 0 = 2002; assign 2003-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1
keep if jahr < 2006
keep if entryyear < 2002 & max_year > 2002
gen year_cent = .
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab year_cent, m
tab jahr, m
save data/train_controls_earlylate-early-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1
keep if jahr > 2002
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_earlylate-late-entry.dta, replace
restore

//	9 counties are affected by routes 2 and 3: 
//	assign 2000-2005 to HH-BER, 0 = 2004; assign 2005-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr < 2006
keep if entryyear < 2004 & max_year > 2004
gen year_cent = .
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab year_cent, m
save data/train_controls_midlate-mid-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2004
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_midlate-late-entry.dta, replace
restore

//	7 counties are affected by all routes: 
//	assign 2000-2003 to FFM-CLG, 0 = 2002; assign 2003-2005 to HH-BER, 0 = 2004; assign 2005-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr < 2004
keep if entryyear < 2002 & max_year > 2002
gen year_cent = .
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab year_cent, m
save data/train_controls_earlymidlate-early-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2002 & jahr < 2006
keep if entryyear < 2004 & max_year > 2004
gen year_cent = .
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab year_cent, m
save data/train_controls_earlymidlate-mid-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2004
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_earlymidlate-late-entry.dta, replace
restore

//	65 counties are affected by only one route
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 0 & entryyear < 2002 & max_year > 2002 | ///
	ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 0 & entryyear < 2004 & max_year > 2004 | ///
	ever_treat_cty_early == 0 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1 & max_year > 2006

gen year_cent = .
tab jahr if ever_treat_cty_early == 1
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab jahr if ever_treat_cty_mid == 1
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab jahr if ever_treat_cty_late == 1
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 

tab year_cent, m

append using data/train_controls_earlylate-early-entry.dta
append using data/train_controls_earlylate-late-entry.dta
append using data/train_controls_midlate-mid-entry.dta
append using data/train_controls_midlate-late-entry.dta
append using data/train_controls_earlymidlate-early-entry.dta
append using data/train_controls_earlymidlate-mid-entry.dta
append using data/train_controls_earlymidlate-late-entry.dta

tab year_cent, m

// annual dummies
cap drop d_year_cent_*
foreach t of numlist 1/8 {
	gen d_year_cent_`t' = (year_cent == `t')
}
gen d_year_cent_neg6 = (year_cent == -6)
gen d_year_cent_neg5 = (year_cent == -5)
gen d_year_cent_neg4 = (year_cent == -4)
gen d_year_cent_neg3 = (year_cent == -3)
gen d_year_cent_neg2 = (year_cent == -2)
gen d_year_cent_neg1 = (year_cent == -1)

cap drop int_cent_y*
foreach t in neg6 neg5 neg4 neg3 neg2 neg1 1 2 3 4 5 6 7 8 {
	gen int_cent_y`t' = ever_faster_any * d_year_cent_`t'
}

cap drop int_1_cent_y*
foreach t in neg6 neg5 neg4 neg3 neg2 neg1 1 2 3 4 5 6 7 8 {
	gen int_1_cent_y`t' = ever_faster1 * d_year_cent_`t'
}

// bi-annual dummies
gen d_bi_year_cent_neg56 = (year_cent == -6 | year_cent == -5)
gen d_bi_year_cent_neg34 = (year_cent == -4 | year_cent == -3)
gen d_bi_year_cent_neg12 = (year_cent == -2 | year_cent == -1)

gen d_bi_year_cent_pos12 = (year_cent == 1 | year_cent == 2)
gen d_bi_year_cent_pos34 = (year_cent == 3 | year_cent == 4)
gen d_bi_year_cent_pos56 = (year_cent == 5 | year_cent == 6)

cap drop int_bi_cent_y*
foreach t in neg56 neg34 neg12 pos12 pos34 pos56 {
	gen int_bi_cent_y`t' = ever_faster_any * d_bi_year_cent_`t'
}

cap drop int_1_bi_cent_y*
foreach t in neg56 neg34 neg12 pos12 pos34 pos56 {
	gen int_1_bi_cent_y`t' = ever_faster1 * d_bi_year_cent_`t'
}

***	Run regression

preserve

bys betnr: egen first_year = min(jahr)
keep if first_year < 2002 & year_cent >= -4 & year_cent <= 6

count
count if flg_est == 1

gen aux_empl = empl_bet if jahr >= 2000 & jahr < 2002
bys betnr: egen empl_2000 = mean(aux_empl)
drop aux_empl

forvalues q = 25(25)75 {
	qui egen empl_perc_`q' = pctile(empl_2000), p(`q')
}
qui gen perc_empl = 100 if empl_2000 < .
forval q = 75(-25)25 {
	qui replace perc_empl = `q' if empl_2000 <= empl_perc_`q' & empl_perc_`q' != .
}
drop empl_perc*

egen group_strata = group(ao_kreis perc_empl jahr)
bysort group_strata: egen tmp1 = sum(ever_faster1 == 0 & ln_prdt_bet != .)
gen tmp2 = (tmp1 >0 )
bysort group_strata: egen tmp3 = sum(ever_faster1 == 1 & ln_prdt_bet != .)
gen tmp4 = (tmp3 >0 )

cap drop aux_touse
gen aux_touse = (tmp2 > 0 & tmp4 > 0)

cap drop sample
qui reghdfe ln_prdt_bet ever_faster1 int_1_cent_y* if aux_touse == 1, absorb(betnr county_year) vce(robust)
qui gen sample = e(sample)

cap drop touse*
cap drop weight*

bys group_strata: egen tmp7 = sum(ever_faster1 == 0 & sample == 1)
gen tmp8 = (tmp7 > 0)
by  group_strata: egen tmp9 = sum(ever_faster1 == 1 & sample == 1)
gen tmp10 = (tmp9 > 0)

gen touse = (tmp8 > 0 & tmp10 > 0)
qui reghdfe ln_prdt_bet ever_faster1 int_1_cent_y* if touse == 1, absorb(betnr county_year) vce(robust)

cap drop n_treat_s n_ec_s
bysort group_strata: egen n_treat_s = sum(ever_faster1 == 1 & sample == 1)
bysort group_strata: egen n_ec_s = sum(ever_faster1 == 0 & sample == 1)

gen aux_weight = (n_treat_s/n_ec_s) if ever_faster1 == 0 & aux_touse == 1 & sample == 1
replace aux_weight = 1 if ever_faster1 == 1 & aux_touse == 1 & sample == 1

sum ever_faster1 if ever_faster1 == 1 & touse == 1 & sample == 1
local n_treat = `r(N)'

sum ever_faster1 if ever_faster1 == 0 & touse == 1 & sample == 1
local n_ec = `r(N)' 

gen weight = 1 if ever_faster1 == 1 & touse == 1 & sample == 1
replace weight = `n_ec'/`n_treat'*aux_weight if ever_faster1 == 0 & touse == 1 & sample == 1

tabstat weight if touse == 1 & sample == 1, c(s) s(N sum)

qui reghdfe ln_empl_bet ever_faster1 int_1_cent_y* if touse == 1 [aweight=weight], absorb(betnr county_year) vce(robust)
tabstat weight if e(sample), c(s) s(N sum)

drop aux_weight 
drop n_treat_s n_ec_s
drop tmp*

local j = 1
foreach outcome in ln_prdt_bet avg_lnw_bot_bet count_mgmt_bet shr_bloss_w_bet {

	qui sum `outcome', det
	qui replace `outcome' = r(p99) if `outcome' > r(p99) & `outcome' != .
	qui sum `outcome', det
	qui replace `outcome' = r(p1) if `outcome' < r(p1) & `outcome' != .
	

	eststo vcebet`j', title(`outcome'): ///
		qui reghdfe `outcome' ever_faster1 int_1_bi_cent_y* if touse == 1 [aweight=weight], absorb(betnr county_year) vce(cluster ao_kreis)
		qui estadd scalar count_bet = e(K1): vcebet`j'
		qui estadd scalar count_kreis = e(K2): vcebet`j'

	local j = `j' + 1
}

********************************************************************************
*** Figure VIIIa, Table D.5, top right
esttab vcebet1 vcebet2 vcebet3 vcebet4, ///
	b(%9.3f) se(%9.3f) star r2 obslast compress mtitles star(+ 0.20 ++ 0.10 * 0.05 ** 0.01 *** 0.001) stats(r2 N count_bet count_kreis, fmt(%9.3f %9.0f %9.0f %9.0f))  style(tex) label
esttab vcebet1 vcebet2 vcebet3 vcebet4, ///
	b(%9.3f) p(%9.3f) star r2 obslast compress mtitles star(+ 0.20 ++ 0.10 * 0.05 ** 0.01 *** 0.001) stats(r2 N count_bet count_kreis, fmt(%9.3f %9.0f %9.0f %9.0f))  style(tex) label

tab year_cent if e(sample), m

restore

log close
